In [1]:
import pandas as pd
import numpy as np
In [2]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('Datasets/ElectricityByCounty.xlsx')

# Reshape the DataFrame
df = df.melt(id_vars=['County', 'Sector'],
             var_name='Timestamp', value_name='Yearly Data')

# Convert the "Timestamp" column to datetime format representing the year
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%Y')

# Sort the DataFrame by County and Timestamp
df = df.sort_values(['County', 'Timestamp'])

# Print the resulting DataFrame
df_indexd = df.set_index('Timestamp')
df_indexd
Out[2]:
County Sector Yearly Data
Timestamp
1990-01-01 ALAMEDA Non-Residential 7109.299895
1990-01-01 ALAMEDA Residential 2498.265626
1990-01-01 ALAMEDA Total 9607.565521
1991-01-01 ALAMEDA Non-Residential 6809.573768
1991-01-01 ALAMEDA Residential 2515.209970
... ... ... ...
2020-01-01 YUBA Residential 251.861806
2020-01-01 YUBA Total 567.007959
2021-01-01 YUBA Non-Residential 315.822249
2021-01-01 YUBA Residential 260.500675
2021-01-01 YUBA Total 576.322924

5568 rows × 3 columns

In [3]:
import pandas as pd
import plotly.graph_objects as go
from statsmodels.tsa.arima.model import ARIMA
from dateutil.relativedelta import relativedelta
import numpy as np
from pmdarima import auto_arima

# Read the Excel file
df = pd.read_excel('Datasets/ElectricityByCounty.xlsx')

# Reshape the DataFrame
df = df.melt(id_vars=['County', 'Sector'],
             var_name='Timestamp', value_name='Yearly Data')

# Convert the "Timestamp" column to datetime format representing the year
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%Y')

# Sort the DataFrame by County and Timestamp
df = df.sort_values(['County', 'Timestamp'])

# Iterate over each country and sector
for country in df['County'].unique():
    for sector in df['Sector'].unique():
        fig = go.Figure()

        # Get the energy consumption data for the current country and sector
        df_filter = df[(df['County'] == country) & (
            df['Sector'] == sector)][['Timestamp', 'Yearly Data']]
        df_filter_index = df_filter.set_index('Timestamp')

        # Prepare the data for modeling
        years = df_filter_index.index
        energy_consumption = df_filter_index.values.flatten()

        # Split the data into training and testing
        # Use all data except the last 5 years for training
        train_data = energy_consumption[:-5]
        test_data = energy_consumption[-5:]  # Use the last 5 years for testing

        # Fit the auto ARIMA model
        model = auto_arima(train_data, seasonal=False)
        model.fit(train_data)

        # Generate predictions
        predictions = model.predict(n_periods=len(test_data))

        # Plot the training data
        fig.add_trace(go.Scatter(
            x=years[:-5], y=train_data, mode='lines+markers', name='Training Data'))

        # Plot the predictions
        fig.add_trace(go.Scatter(
            x=years[-5:], y=test_data, mode='lines+markers', name='Testing Data'))
        fig.add_trace(go.Scatter(
            x=years[-5:], y=predictions, mode='lines+markers', name='Predictions'))

        # Update the layout
        fig.update_layout(title=f'Energy Consumption Forecast Country : {country} : Sector {sector} ',
                          xaxis_title='Year', yaxis_title='Energy Consumption')

        # Show the plot
        fig.show()